/*====================================================================================
A program that computes the Thomsen Estimator, but with a logit specification 
and implementation of tetrachoric approximation. See Thomsen (1987) for more detail. 
Program written by Won-ho Park, University of Michigan
Copyright Won-ho Park, Aug. 2002 
=======================================================================================*/

* The input argument should look like this: 
* ecol var1 var2 var3 var4
* where var1 and var3 are the party votes of interest
* and var2 and var4 are the total number of votes.  
* The variables should all be integers, that is, they should be 
* vote counts, not fractions. 
*
* Also note that the dependent variable (Election 2) is var3
* above.  Thus, if one wants to run a model other than a 
* voter transfer model, than one should specify the independent 
* variable as var3 above. 


*mat drop _all
program define ecol1, rclass byable(recall)
  version 7.0
  syntax varlist(min=4 max=4 numeric) [if] [in]
  marksample touse , strok
  tokenize `varlist'
  confirm var `1' `2' `3' `4'
  local vote1 `1' 
  local vote2 `2' 
  local vote3 `3' 
  local vote4 `4' 

**********************************
* Declare Temp variables, macros, matrices
  tempname natsum nat_v1 nat_v2 
  tempname corr n core Loyalty zhi zlow rhi rlow loy_hi loy_lo k
  tempname d_core m_natv2 defect Defect de_lo de_hi output
  tempvar x1 x2 lx1 lx2 

***********************************
* Get the national proportions: 
  quietly  tabstat `vote1' `vote2' `vote3' `vote4' if `touse', s(sum) save
  mat `natsum'=r(StatTot)
  scalar `nat_v1'= `natsum'[1,1]/`natsum'[1,2]
  scalar `nat_v2'= `natsum'[1,3]/`natsum'[1,4]
* nat_v1 and nat_v2 are national proportions (i.e. scalars). 

**************************************
* Non-linear Tranformation
* x's are party % returns, px's are non-linear transformations like probit or logit
  qui  gen `x1'=`vote1'/`vote2' if `touse'
  qui  gen `x2'=`vote3'/`vote4' if `touse'
  qui  gen `lx1'=log(`vote1'/(`vote2'-`vote1')) if `touse'
  qui  gen `lx2'=log(`vote3'/(`vote4'-`vote3')) if `touse'

  quietly correlate `lx1' `lx2' [aw=`vote4'] if `touse'
  scalar `corr' = r(rho)
  scalar `n' = r(N)

* Now use the techtachoric formula 
  scalar `k'=sqrt((1+2*`corr'*`nat_v1'+2*`corr'*`nat_v2'-`corr')^2-8*`corr'*(1+`corr')*`nat_v1'*`nat_v2')
  scalar `core'=(1+2*`corr'*`nat_v1'+2*`corr'*`nat_v2'-`corr'-`k')/(4*`corr')
  scalar `Loyalty' = `core'/`nat_v1'

* To derive the standard deviations:
scalar `zhi'   = log((1+`corr')/(1-`corr'))/2 + 1.96/sqrt(`n'-3)
scalar `zlow'  = log((1+`corr')/(1-`corr'))/2 - 1.96/sqrt(`n'-3)
scalar `rhi'   = (exp(2*`zhi' )-1)/(exp(2*`zhi' )+1)
scalar `rlow'  = (exp(2*`zlow')-1)/(exp(2*`zlow')+1)
scalar `k'     = sqrt((1+2*`rhi'*`nat_v1'+2*`rhi'*`nat_v2'-`rhi')^2-8*`rhi'*(1+`rhi')*`nat_v1'*`nat_v2')
scalar `loy_hi'= (1+2*`rhi'*`nat_v1'+2*`rhi'*`nat_v2'-`rhi'-`k')/(4*`rhi')/`nat_v1'
scalar `k'     = sqrt((1+2*`rlow'*`nat_v1'+2*`rlow'*`nat_v2'-`rlow')^2-8*`rlow'*(1+`rlow')*`nat_v1'*`nat_v2')
scalar `loy_lo'= (1+2*`rlow'*`nat_v1'+2*`rlow'*`nat_v2'-`rlow'-`k')/(4*`rlow')/`nat_v1'

* The Defection Part
mat `d_core' =(`Loyalty', `loy_lo', `loy_hi')*`nat_v1'
mat `m_natv2'=(`nat_v2', `nat_v2', `nat_v2')
mat `defect' =(`m_natv2'-`d_core')/(1-`nat_v1')
sca `Defect' =`defect'[1,1]
sca `de_lo'  =`defect'[1,3]
sca `de_hi'  =`defect'[1,2]
mat `output' =(`Loyalty', `loy_lo', `loy_hi')
mat `output' = `output'\ `defect'

tempname props pops
mat `props'   =(`core', `nat_v2'-`core')
mat `props'   = `props' \ (`nat_v1'-`core', 1-`nat_v2'-`nat_v1'+`core')
*mat list `props'
*mat list `natsum'
mat `pops'   = `props' * `natsum'[1,4]
return matrix Props `props'
return matrix Pops `pops'

* Now get the output
di _newline "{it:Thomsen Estimates, Logit and Tetrachoric Approx.}"
di in text "{hline 19}{c TT}{hline 45}"
di "`vote3'""<---""`vote1'"_col(20)"{c |}" _col(25) "Rates" _col(40)"[95% Conf. Interval]"
di "{hline 19}{c +}{hline 45}"
di "{txt}   Loyalty Rate" _col(20)"{c |}{res}" _col(25) %7.5f `Loyalty' _col(40) "["%7.5f `loy_lo' " ,     " %7.5f `loy_hi' "]"
di "{txt} Defection Rate" _col(20)"{c |}{res}" _col(25) %7.5f `Defect'  _col(40) "["%7.5f `de_lo'  " ,     "  %7.5f `de_hi' "]"
di "{txt}    Correlation" _col(20)"{c |}{res}" _col(25) %7.5f `corr'    _col(40) "["%7.5f `rlow'   " ,     "   %7.5f `rhi' "]"
di in text "{hline 19}{c BT}{hline 45}"
di "{txt}n = {res}" `n'

* Now finally Store the results in r()
return scalar core = `core'
return scalar rho  = `corr'
* return scalar d_core = d_core' 
return matrix votes `natsum' 
return scalar n = `n'
return matrix out `output'
end
